Food Waste Characterization Preliminary Analysis

This script is meant to analyze the characteristics of the food waste data.

# Load the library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(vegan)
## Warning: package 'vegan' was built under R version 4.4.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.4.3
## Loading required package: lattice
library(Polychrome)
## Warning: package 'Polychrome' was built under R version 4.4.3
theme_set(theme_bw(base_size = 16))

### bring in the data

df <- read.csv(file = "Dataset/Food_waste_characterization.csv",header = TRUE,sep = ",")

as always we should look at the dimensions and check to see if everything loaded properly

dim(df)
## [1] 88 17
glimpse(df)
## Rows: 88
## Columns: 17
## $ num               <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ G.num             <chr> "DP.1", "DP.2", "DP.3", "DP.4", "DP.5", "DP.6", "FOG…
## $ Food.Wastes       <chr> "Cheese", "Milk", "Baby milk", "Yogurt/yogurt drink"…
## $ Food.Wastes.Clean <chr> "Cheese", "Milk", "Baby_milk", "Yogurt_yogurt_drink"…
## $ Group             <chr> "DP", "DP", "DP", "DP", "DP", "DP", "FOG", "FOG", "F…
## $ pH                <dbl> 5.93, 6.76, 7.15, 4.30, 6.37, 7.23, 3.50, 3.20, 3.01…
## $ TS.Perc           <dbl> 49.86, 10.96, 11.38, 30.90, 39.56, 88.58, 75.40, 99.…
## $ VS.Perc           <dbl> 42.63, 10.56, 10.89, 30.30, 37.27, 88.22, 71.63, 98.…
## $ TOC               <dbl> 29.80, 5.39, 4.41, 12.80, 51.00, 74.90, 86.10, 74.63…
## $ TKN               <dbl> 2.90, 1.66, 1.95, 0.91, 1.54, 0.51, 0.35, 0.22, 0.69…
## $ Fat.Perc          <dbl> 23.20, 15.60, 23.80, 5.10, 19.50, 83.40, 100.00, 100…
## $ Protein.Perc      <dbl> 18.50, 33.40, 10.35, 14.30, 4.50, 3.27, 0.00, 0.00, …
## $ Carbohydrate.Perc <dbl> 58.30, 51.00, 65.85, 80.60, 76.00, 13.33, 0.00, 0.00…
## $ TP                <dbl> 1.14, 1.70, 1.06, 0.70, 1.09, 2.30, 0.01, 0.00, 0.00…
## $ TK                <dbl> 0.17, 1.10, 1.69, 0.14, 0.60, 1.50, 0.00, 0.01, 0.00…
## $ C.N.Mixture       <dbl> 17.38, 3.32, 17.30, 14.06, 17.31, 17.38, 48.50, 26.9…
## $ BMP               <dbl> 561.0, 231.0, 315.0, 450.0, 591.0, 660.0, 586.0, 648…
# looks good

# lets get a summary for completeness

summary(df)
##       num           G.num           Food.Wastes        Food.Wastes.Clean 
##  Min.   : 1.00   Length:88          Length:88          Length:88         
##  1st Qu.:22.75   Class :character   Class :character   Class :character  
##  Median :44.50   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :44.50                                                           
##  3rd Qu.:66.25                                                           
##  Max.   :88.00                                                           
##     Group                 pH           TS.Perc         VS.Perc     
##  Length:88          Min.   :2.630   Min.   : 1.52   Min.   : 1.51  
##  Class :character   1st Qu.:4.965   1st Qu.:19.64   1st Qu.:15.94  
##  Mode  :character   Median :6.085   Median :40.08   Median :31.70  
##                     Mean   :5.772   Mean   :48.88   Mean   :40.86  
##                     3rd Qu.:6.737   3rd Qu.:88.58   3rd Qu.:69.71  
##                     Max.   :7.850   Max.   :99.95   Max.   :99.75  
##       TOC             TKN            Fat.Perc        Protein.Perc   
##  Min.   : 3.50   Min.   :0.0400   Min.   :  0.000   Min.   : 0.000  
##  1st Qu.:12.50   1st Qu.:0.8225   1st Qu.:  1.075   1st Qu.: 7.168  
##  Median :29.29   Median :1.6000   Median :  7.300   Median :15.320  
##  Mean   :30.94   Mean   :1.9482   Mean   : 26.566   Mean   :16.221  
##  3rd Qu.:44.62   3rd Qu.:2.8925   3rd Qu.: 55.733   3rd Qu.:23.773  
##  Max.   :86.10   Max.   :6.1500   Max.   :100.000   Max.   :43.200  
##  Carbohydrate.Perc       TP               TK          C.N.Mixture   
##  Min.   : 0.00     Min.   :0.0000   Min.   : 0.000   Min.   : 2.08  
##  1st Qu.:18.38     1st Qu.:0.1775   1st Qu.: 0.180   1st Qu.:16.12  
##  Median :69.86     Median :0.4900   Median : 0.730   Median :18.02  
##  Mean   :57.11     Mean   :0.6369   Mean   : 1.387   Mean   :19.46  
##  3rd Qu.:82.81     3rd Qu.:0.9425   3rd Qu.: 2.138   3rd Qu.:21.90  
##  Max.   :99.40     Max.   :3.6600   Max.   :10.450   Max.   :48.50  
##       BMP        
##  Min.   : 216.0  
##  1st Qu.: 372.2  
##  Median : 440.0  
##  Mean   : 471.6  
##  3rd Qu.: 524.8  
##  Max.   :1476.0

The first thing we want to do is understand the specific masses of each substrate per food waste item. For this we are going to assume that the TS.Perc represents the total mass of digestible substrate and from here we can calculate the mass of each of the components

example:

Mass Carbohydrate = [(Standard Mass (1000g) * TS.Perc) * Carbohydrate.Perc]

# first we should convert everything into a percent

df.p <- df %>%  mutate(TS.Perc=TS.Perc/100) %>% mutate(VS.Perc=VS.Perc/100) %>% mutate(Fat.Perc=Fat.Perc/100) %>% mutate(Protein.Perc=Protein.Perc/100) %>% mutate(Carbohydrate.Perc=Carbohydrate.Perc/100)

# and now we can make a standard mass per 1000grams

kg <- 1000

df.p <- df.p %>% mutate(TS.mass=kg*TS.Perc) %>% mutate(Fat.mass=TS.mass*Fat.Perc) %>% mutate(Protein.mass=TS.mass*Protein.Perc) %>% mutate(Carbohydrate.mass=TS.mass*Carbohydrate.Perc)

And with that we can start making plots to visualize the different food wastes

We will begin with a histogram showing the freq distribution of different percentage quantities of each food

hist(df.p$Fat.Perc)

hist(df.p$Protein.Perc)

hist(df.p$Carbohydrate.Perc)

We can melt the data so it is easier to use in ggplot as well

df.m <- df.p %>% select(G.num,Group,Food.Wastes,Fat.Perc,Protein.Perc,Carbohydrate.Perc) %>% melt(id.vars = c("G.num","Food.Wastes","Group"))

ggplot(data = df.m,aes(x=Group,y=value,fill = variable))+geom_bar(stat='identity',position = 'dodge')

df.mm <- df.p %>% select(G.num,Group,Food.Wastes,Fat.mass,Protein.mass,Carbohydrate.mass) %>% melt(id.vars = c("G.num","Food.Wastes","Group"))

ggplot(data = df.mm,aes(x=Group,y=value,fill = variable))+geom_bar(stat='identity',position = 'dodge')

plot_ly(z=df.p$Carbohydrate.mass, y=df.p$Protein.mass, x=df.p$Fat.mass, type = "scatter3d",mode="markers", color = df.p$Group)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plot_ly(z=df.p$Carbohydrate.Perc, y=df.p$Protein.Perc, x=df.p$Fat.Perc, type = "scatter3d",mode="markers", color = df.p$Group)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
fig <- plot_ly(df.p, x = ~Carbohydrate.Perc, y = ~Protein.Perc, z = ~Fat.Perc, color = ~Group)
fig <- fig %>% add_markers()
fig <- fig %>% layout(scene = list(xaxis = list(title = '% Carbohydrates'),
                     yaxis = list(title = '% Proteins'),
                     zaxis = list(title = '% Fat')))

fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pc <- df %>% select(G.num,Food.Wastes,Group,Fat.Perc,Protein.Perc,Carbohydrate.Perc)

com = pc[,4:ncol(pc)]

m_com = as.matrix(com)

set.seed(666)
nmds = metaMDS(m_com, distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.02537536 
## Run 1 stress 0.02537532 
## ... New best solution
## ... Procrustes: rmse 3.78026e-05  max resid 0.0001308946 
## ... Similar to previous best
## Run 2 stress 0.02537538 
## ... Procrustes: rmse 3.490184e-05  max resid 0.0001190826 
## ... Similar to previous best
## Run 3 stress 0.02537537 
## ... Procrustes: rmse 4.19713e-05  max resid 0.0001460885 
## ... Similar to previous best
## Run 4 stress 0.02537538 
## ... Procrustes: rmse 3.000317e-05  max resid 0.0001042045 
## ... Similar to previous best
## Run 5 stress 0.02537532 
## ... Procrustes: rmse 8.910618e-06  max resid 3.048899e-05 
## ... Similar to previous best
## Run 6 stress 0.02537533 
## ... Procrustes: rmse 2.542539e-05  max resid 8.824816e-05 
## ... Similar to previous best
## Run 7 stress 0.02537537 
## ... Procrustes: rmse 2.848793e-05  max resid 0.0001014179 
## ... Similar to previous best
## Run 8 stress 0.02537543 
## ... Procrustes: rmse 4.703526e-05  max resid 0.000162448 
## ... Similar to previous best
## Run 9 stress 0.02537536 
## ... Procrustes: rmse 2.941745e-05  max resid 0.0001020464 
## ... Similar to previous best
## Run 10 stress 0.02537534 
## ... Procrustes: rmse 2.198387e-05  max resid 7.540184e-05 
## ... Similar to previous best
## Run 11 stress 0.0253754 
## ... Procrustes: rmse 5.040864e-05  max resid 0.0001744206 
## ... Similar to previous best
## Run 12 stress 0.02537541 
## ... Procrustes: rmse 4.396229e-05  max resid 0.0001511795 
## ... Similar to previous best
## Run 13 stress 0.02537541 
## ... Procrustes: rmse 4.147091e-05  max resid 0.000142173 
## ... Similar to previous best
## Run 14 stress 0.02537532 
## ... Procrustes: rmse 5.970192e-06  max resid 2.544822e-05 
## ... Similar to previous best
## Run 15 stress 0.0253754 
## ... Procrustes: rmse 5.032362e-05  max resid 0.0001750842 
## ... Similar to previous best
## Run 16 stress 0.02537542 
## ... Procrustes: rmse 4.644215e-05  max resid 0.0001589299 
## ... Similar to previous best
## Run 17 stress 0.02537537 
## ... Procrustes: rmse 3.89534e-05  max resid 0.0001352629 
## ... Similar to previous best
## Run 18 stress 0.02537541 
## ... Procrustes: rmse 4.509028e-05  max resid 0.000156938 
## ... Similar to previous best
## Run 19 stress 0.02537537 
## ... Procrustes: rmse 3.0038e-05  max resid 0.0001037159 
## ... Similar to previous best
## Run 20 stress 0.0253754 
## ... Procrustes: rmse 4.138958e-05  max resid 0.0001415059 
## ... Similar to previous best
## *** Best solution repeated 20 times
nmds
## 
## Call:
## metaMDS(comm = m_com, distance = "bray") 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(m_com)) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.02537532 
## Stress type 1, weak ties
## Best solution was repeated 20 times in 20 tries
## The best solution was from try 1 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(m_com))'
plot(nmds)

data.scores = as.data.frame(scores(nmds)$sites)

data.scores$G.num = pc$G.num
data.scores$Food.Wastes = pc$Food.Wastes
data.scores$Group = pc$Group

ggplot(data.scores, aes(x = NMDS1, y = NMDS2)) + 
  geom_point(size = 3, aes( colour = Group))

unique(data.scores$Group)
##  [1] "DP"   "FOG"  "IC"   "FAV"  "CCG"  "CP"   "BW"   "MP"   "FP"   "EP"  
## [11] "SSG"  "SS"   "BEV"  "RERW" "OT"
P15 = createPalette(15,  c("#ff0000", "#00ff00", "#0000ff"))

ggplot(data.scores, aes(x = NMDS1, y = NMDS2)) + 
  geom_point(size = 3, aes( colour = Group))+scale_color_manual(values = unname(P15))